home *** CD-ROM | disk | FTP | other *** search
Java Source | 2002-03-28 | 6.6 KB | 238 lines |
- // ------------------------------------------------------------------
- //
- // Purpose.............: UrlReplaceDownload
- // Created.............: May 8, 2001
- // Copyright...........: (c) 2001 by Adobe Systems
- //
- // ------------------------------------------------------------------
-
- import java.io.*;
- import java.util.ResourceBundle;
- import java.util.Locale;
- import java.net.URL;
- import java.net.HttpURLConnection;
- import java.net.URLConnection;
- import org.apache.regexp.*;
-
- /**
- * This class perform download URL and save to localdisk with replacing links.
- * For example:
- * <pre>
- * UrlReplaceDownload urlReplaceDownload = new UrlReplaceDownload(savePath);
- * urlReplaceDownload.replaceDownload(urlList);
- * </pre>
- *
- * @author Adobe Systems, Inc.
- * @since 1.0
- */
- public class UrlReplaceDownload {
- /** Saving directory path */
- static String savePath;
-
- /**
- * Constructor with no parameter
- * @param void
- * @return void
- */
- public UrlReplaceDownload() {
- savePath = "." + File.separatorChar;
- }
-
- /**
- * Constructor with saving directory path parameter
- * @param path Set saving directory path
- * @return void
- */
- public UrlReplaceDownload(String path) {
- savePath = path;
- }
-
- /**
- * Set saving directory path
- * @param path Set saving directory path
- * @return void
- */
- public void setSavePath(String path) {
- savePath = path;
- }
-
- /**
- * Download URL from list and save to local disk
- * @param urlList URL and saving filename list
- * @return int the number of downloaded count
- * @see UrlList
- */
- public int replaceDownload(UrlList urlList) throws IOException {
- BufferedOutputStream bos = null;
- BufferedInputStream bis = null;
- HttpURLConnection http = null;
-
- int downloadedCount = 0;
- ResourceBundle rb = ResourceBundle.getBundle("PageGenerator");
- print(rb.getString("urlCount") + urlList.count);
- for (int i = 0; i < urlList.count; i++) {
- try {
- String outputFilePath = savePath + urlList.filePath[i] + urlList.file[i];
-
- File f = new File(savePath + urlList.filePath[i]);
- f.mkdirs();
-
- bos = new BufferedOutputStream(new FileOutputStream(outputFilePath));
- URL url = new URL(urlList.url[i].url);
- print("URL = " + urlList.url[i].url);
- print(" =>" + outputFilePath);
-
- http = (HttpURLConnection)url.openConnection();
- bis = new BufferedInputStream(http.getInputStream());
-
- byte[] b = new byte[32768];
- int size;
- String fname = urlList.file[i].toUpperCase();
- if (fname.indexOf(".HTM") == -1) { // non-HTML page
- while ((size = bis.read(b)) != -1) {
- bos.write(b, 0, size);
- }
- } else { // HTML page
- String buf = "";
- while ((size = bis.read(b)) != -1) {
- buf += new String(b, 0, size);
- }
- String replaced = replace(buf, urlList, i);
- bos.write(replaced.getBytes());
- }
- bos.flush();
- downloadedCount++;
- } catch(Exception e) {
- if (http != null) {
- int errCode = -1;
- String errMsg = rb.getString("serverDidNotRespond");
- try {
- errCode = http.getResponseCode();
- errMsg = http.getResponseMessage();
- }
- catch (Exception xe) {}
- System.err.println(rb.getString("httpError") + errCode + " " + errMsg);
- } else {
- e.printStackTrace();
- }
- } finally {
- try {
- if (bos != null) bos.close();
- } catch(Exception e) {
- e.printStackTrace();
- }
- try {
- if (bis != null) bis.close();
- } catch(Exception e) {
- e.printStackTrace();
- }
- try {
- if (http != null) http.disconnect();
- } catch(Exception e) {
- e.printStackTrace();
- }
- bos = null;
- bis = null;
- http = null;
- }
- }
- return downloadedCount;
- }
-
- /**
- * replace URL to filename
- * @param html Downloaded HTML content
- * @param urlList UrlList
- * @param listIndex Current downloading index in UrlList
- * @return Replaced HTML content string
- * @see UrlList
- */
- private String replace(String html, UrlList urlList, int listIndex) {
- int index = 0;
- int hrefIndex = 0;
- int startUrlIndex = 0;
- int endQuoteIndex = 0;
-
- RE reQuoted = null;
-
- String href = "href=";
- int hrefLen = href.length();
-
- for (index = 0; index < html.length() - hrefLen; index++) {
- String comp = html.substring(index, index+hrefLen);
- if (0 != href.compareToIgnoreCase(comp)) continue;
- int quotation = 0;
- switch (html.charAt(index+hrefLen)) {
- case '\'':
- quotation = 1;
- startUrlIndex = index + hrefLen + 1;
- endQuoteIndex = html.indexOf('\'', startUrlIndex);
- break;
- case '\"':
- quotation = 2;
- startUrlIndex = index + hrefLen + 1;
- endQuoteIndex = html.indexOf('\"', startUrlIndex);
- break;
- default:
- quotation = 0;
- startUrlIndex = index + hrefLen;
- int candidate = 0;
- int temp = 0;
- if (-1 != (temp = html.indexOf(' ', startUrlIndex))) {
- candidate = temp;
- }
- if (-1 != (temp = html.indexOf('>', startUrlIndex))) {
- candidate = Math.min(candidate, temp);
- }
- endQuoteIndex = -1;
- if (candidate != 0) { // check if there is wrong char mixed
- if (-1 != (temp = html.indexOf('<', startUrlIndex))) {
- if (temp < candidate) break;
- }
- if (-1 != (temp = html.indexOf('\'', startUrlIndex))) {
- if (temp < candidate) break;
- }
- if (-1 != (temp = html.indexOf('\"', startUrlIndex))) {
- if (temp < candidate) break;
- }
- endQuoteIndex = candidate; // it passed all check now
- }
- break;
- }
- if (endQuoteIndex == -1) { // quote ending was not found
- ResourceBundle rb = ResourceBundle.getBundle("PageGenerator");
- System.err.println(rb.getString("endQuoteNotFound"));
- continue;
- }
- String orgUrl = html.substring(startUrlIndex, endQuoteIndex);
- if (orgUrl.length() < 5) { // href="URL": URL is too short.
- continue;
- }
-
- String replaceFilePath = urlList.getReplaceFilePath(listIndex, orgUrl);
- if (replaceFilePath == "") {
- // No replacing file path found"
- continue;
- }
- String newUrl = "\"" + replaceFilePath + "\"";
- try {
- if (quotation == 1) reQuoted = REUtil.createRE("\'" + orgUrl + "\'", RE.MATCH_MULTILINE);
- else reQuoted = REUtil.createRE("\"" + orgUrl + "\"", RE.MATCH_MULTILINE);
- } catch (RESyntaxException e) {
- e.printStackTrace();
- }
- html = reQuoted.subst(html, newUrl);
- }
- return html;
- }
-
- /**
- * Display string
- * @param s String to display
- * @return void
- */
- private static void print(String s) {
- System.out.println(s);
- }
- }
-